setwd("/Users/spagnuolo/Desktop/Reuter et el. 2025_final/Raw_data/Data_sets_Phage_titers")
## General packages required for analysis of all experiments
### Data organization and structuring
library(dplyr)
##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(tidyr)
library(car)
## Loading required package: carData
##
## Attaching package: 'car'
## The following object is masked from 'package:dplyr':
##
## recode
library(tidyverse)
## ── Attaching packages ─────────────────────────────────────── tidyverse 1.3.2
## ──
## ✔ ggplot2 3.4.4 ✔ purrr 1.0.2
## ✔ tibble 3.2.1 ✔ stringr 1.5.1
## ✔ readr 2.1.2 ✔ forcats 0.5.2
## ── Conflicts ────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag() masks stats::lag()
## ✖ car::recode() masks dplyr::recode()
## ✖ purrr::some() masks car::some()
### Data visualization
library(ggplot2)
library(gridExtra)
##
## Attaching package: 'gridExtra'
##
## The following object is masked from 'package:dplyr':
##
## combine
library(ggthemes)
library(plotrix)
library(scales) # for trans_beaks when axis is log-transformed
##
## Attaching package: 'scales'
##
## The following object is masked from 'package:plotrix':
##
## rescale
##
## The following object is masked from 'package:purrr':
##
## discard
##
## The following object is masked from 'package:readr':
##
## col_factor
library(ggpubr)
# Experiment 3: Adsorption constant analysis
library(multcomp) # Pairwise comparison
## Loading required package: mvtnorm
## Loading required package: survival
## Loading required package: TH.data
## Loading required package: MASS
##
## Attaching package: 'MASS'
##
## The following object is masked from 'package:dplyr':
##
## select
##
##
## Attaching package: 'TH.data'
##
## The following object is masked from 'package:MASS':
##
## geyser
library(lme4) # Linear mixed model
## Loading required package: Matrix
##
## Attaching package: 'Matrix'
##
## The following objects are masked from 'package:tidyr':
##
## expand, pack, unpack
# Experiment 4: Images analysis plaques
library (viridis)
## Loading required package: viridisLite
##
## Attaching package: 'viridis'
##
## The following object is masked from 'package:scales':
##
## viridis_pal
# Experiment 6: Decay analysis
library(stringr)
Code was used to monitor MOI input for each transfer.
READ ME Specification of columns in data set
Transfer (T): Individual three-hour transfers
Line (L): 5 selection lines started at the beginning of the experiment from five individual large plaques of phage phiX174 WT
CFU: colony-forming units after 2 hours of E. Coli C growth to get exponential overday culture, CFU count of T3 (644 CFU) was acquired by counting a quater of the plate (161 CFU) which was multiplied by a factor 4
Dilution_CFU: plate dilution used to plate the colonies counted in 100 µl
PFU: Plaque forming units counted on plaque assay plate (only done for transfer 5 of each passage)
Dilution_PFU: plate dilution used to plate the plaques counted in 100 µl
df<-read.csv("experiment_1_3_h_transfers.csv")
# df should have 28 observations of 6 variables
str(df) # structure of the columns
## 'data.frame': 28 obs. of 6 variables:
## $ Transfer : chr "T1" "T1" "T1" "T1" ...
## $ Line : chr "L1" "L2" "L3" "L4" ...
## $ CFU : int 76 76 76 76 76 98 98 98 98 98 ...
## $ Dilution_CFU: num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ PFU : int 120 21 238 88 44 21 26 89 19 132 ...
## $ Dilution_PFU: num 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 1e+07 ...
summary(df) # summary of the columns
## Transfer Line CFU Dilution_CFU
## Length:28 Length:28 Min. : 76.0 Min. : 10000
## Class :character Class :character 1st Qu.: 98.0 1st Qu.:100000
## Mode :character Mode :character Median :150.0 Median :100000
## Mean :248.3 Mean : 83929
## 3rd Qu.:373.0 3rd Qu.:100000
## Max. :644.0 Max. :100000
## PFU Dilution_PFU
## Min. : 19.00 Min. : 10000000
## 1st Qu.: 30.50 1st Qu.: 10000000
## Median : 63.50 Median : 10000000
## Mean : 66.82 Mean : 19642857
## 3rd Qu.: 87.25 3rd Qu.: 10000000
## Max. :238.00 Max. :100000000
# Turn Transfer and Line into factors
df$Transfer=as.factor(df$Transfer)
df$Line=as.factor(df$Line)
# Calculate CFU per ml by multiplying CFU counts and Dilution factor and multiply by an additional factor 10 to get CFU per ml because 100 µl of the plating dilution "Dilution_CFU" was used for plating
df$CFU_ml=df$CFU*df$Dilution_CFU*10
# Calculate PFU per ml by multiplying PFU counts and Dilution factor and multiply by an additional factor 10 to get PFU per ml because 100 µl of the plating dilution "Dilution_PFU" was used for the plaque assay
df$PFU_ml=df$PFU*df$Dilution_PFU*10
# Bacteria concentration
a=ggplot(df,aes(Transfer,CFU_ml))+
geom_point()+ # dot plot
xlab("Transfer")+ # label of x-axis
ylab("E.coli C concentration (CFU/ml)")+ # label of y-axis
ggtitle("Bacteria concentration")+ # plot title
theme_bw() #plot layout
a
# Virus concentration for each selection line separately until first small plaques were detected (comments on plot b also apply to plots c-f)
## Selection line 1
b=ggplot(df[df$Line=="L1",],aes(Transfer,PFU_ml))+
geom_point(color="blue")+ # dot plot
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+ # log transformed y-axis
xlab("Transfer")+ # label of x-axis
ylab("PhiX174 concentration (PFU/ml)")+ #label of y-axis
ggtitle("Selection line L1")+ # plot title
theme_bw() # plot layout
## Selection line 2
c=ggplot(df[df$Line=="L2",],aes(Transfer,PFU_ml))+
geom_point(color="purple")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L2")+
theme_bw()
## Selection line 3
d=ggplot(df[df$Line=="L3",],aes(Transfer,PFU_ml))+
geom_point(color="green")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L3")+
theme_bw()
## Selection line 4
e=ggplot(df[df$Line=="L4",],aes(Transfer,PFU_ml))+
geom_point(color="red")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+10))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L4")+
theme_bw()
## Selection line 5
f=ggplot(df[df$Line=="L5",],aes(Transfer,PFU_ml))+
geom_point(color="orange")+
scale_y_continuous(trans="log10", limits=c(1e+09,6e+010))+
xlab("Transfer")+
ylab("PhiX174 concentration (PFU/ml)")+
ggtitle("Selection line L5")+
theme_bw()
# Combine phage concentration plots for all selection lines in one plot
grid.arrange(b,c,d,e,f)
Code used to generate Fig. S2A in the manuscript
READ ME Specification of columns in data set
Line (L): 5 selection lines started at the beginning of the experiment from five individual large plaques of phage phiX174 WT
Genotype: Genotype of the small plaque mutant isolated from the 3-h regime, with the single-point mutation indicated as F[for F protein] and in the brackets the mutation in the protein with old amino acid, amoni acid position, and new amino acid
Transfer_number (T): Number of 3-h transfers required to isolate one isogenic, heritable small plaque
Total_PFU_ml: Total Phage concentation in PFU per ml for the transfer at which the first small plaques were observed
Small_PFU_ml: Estimated concentration of small plaques of the “Total_PFU_ml”
# Create data frame with number of transfers required to get first small plaques
df=read.csv("experiment_1_3_h_transfers_small_plaque_frequency.csv")
# df should have 5 observations of 5 variables
str(df) #structure of the dataset
## 'data.frame': 5 obs. of 5 variables:
## $ Line : chr "L1" "L2" "L3" "L4" ...
## $ Genotype : chr "F(G322D)" "F(G322D)" "F(M213I)" "F(S427*)" ...
## $ Transfer_number: int 6 7 5 4 6
## $ Total_PFU_ml : num 1.9e+10 8.8e+09 7.2e+09 5.8e+09 8.9e+09
## $ Small_PFU_ml : num 1e+09 1e+08 1e+08 1e+08 1e+08
# Convert Line and Genotype into factors
df$Genotype=as.factor(df$Genotype)
df$Line=as.factor(df$Line)
# Create a new column for the frequency of small plaques in %
df$Frequency=NA
# Calculate the frequency in %
df$Frequency=(df$Small_PFU_ml)/(df$Total_PFU_ml*0.01)
# show frequencies
print(df$Frequency)
## [1] 5.263158 1.136364 1.388889 1.724138 1.123596
# Code to get the x-ticks in two lines including selection line AND genotype
addline_format <- function(x,...){
gsub('\\s','\n',x)
}
# Visualize results
x=ggplot(df,aes(x=reorder(Line, -Transfer_number),y=Transfer_number, group=Genotype, fill=Genotype))+
geom_col()+ # bar chart
coord_flip()+ # flip x and y axis
scale_fill_manual(values=c("#c6d325","#CC97A7", "#ef7c00"))+ # fill colors based on genotype
scale_x_discrete(limits=c("L2","L1","L5","L3","L4"),
labels=addline_format(c("F(G322D) L2","F(G322D) L1","F(G322D) L5","F(M213I) L3","F(S427*) L4")))+ # change y-axis labeling (x command because axes are flipped)
annotate("text",x=5, y=1,
label = "1.72% small plaques", size = 5)+ # add frequency Line 4: 1.72%
annotate("text",x=4, y=1,
label = "1.39% small plaques", size = 5)+ # add frequency Line 3: 1.39%
annotate("text",x=3, y=1,
label = "1.12% small plaques", size = 5)+ # add frequency Line 5: 1.12%
annotate("text",x=2, y=1,
label = "5.26% small plaques", size = 5)+ # add frequency Line 1: 5.26%
annotate("text",x=1, y=1,
label = "1.14% small plaques", size = 5)+ # add frequency Line 2: 1.14%
ylab("Number of 3-h transfer")+ # label x axis (after flipping)
xlab("Genotype")+ # label y-axis (after flipping)
theme_bw()+ # plot layout
theme(legend.position="none")+ # not legend
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
# Same image as high resolution jpeg file
# ggsave("mut_freq_3h.jpeg", plot = x, dpi = 600, width = 10, height = 6)
Code was used to monitor MOI input for each transfer.
READ ME Specification of columns in data set
Passage (P): describes 5 transfers done on one day including 4 30-minute transfers followed by 1 3-hour transfer
Transfer (T): Individual transfers done within one passage
Passage_ID: Individual ID for each single passage+transfer ranging from 1-50
Line (L): 5 selection lines started at the beginning of the experiment from five individual large plaques of phage phiX174 WT
CFU: colony-forming units after 2 hours of E. Coli C growth to get exponential overday culture
Dilution_CFU: plate dilution used to plate the colonies counted in 100µl
CFU_ml: colony-forming units per milliliter calculated by CFU* Dilution_CFU*10. Factor 10 for multiplication used because plating volume was 100 µl
PFU: Plaque forming units counted on plaque assay plate (only done for transfer 5 of each passage)
Dilution_PFU: plate dilution used to plate the plaques counted in 100µl
PFU_ml: Plaque forming units per milliliter calculated by PFU* Dilution_PFU*10. Factor 10 for multiplication used because plating volume was 100 µl
Spot_assay_dilution: Dilution of phages from transfers 1 to 4 at which a spot on a lawn of E. Coli C WT could be detected. (Only done for transfers 1 to 4 for each passage)
Output_phage_ml: Phage concentration in phages per milliliter calculated after each transfer. For transfers 1 to 4 by calculating Spot_assay_dilution*500. Factor 500 because 2 µl of phage lysate was plated. For transfer 5 concentration calculation as described under “PFU_ml”
-Inoculation_volume_ml: Volume of phage lysate put into each transfer. For transfer 2 to 5 on each passage day 100 µl was used. For the first transfer of each passage day the phage input volume was calculated to get an MOI input of ~0.1
Real_virus_concentration: real phage concentration put into each transfer based on Spot assay and PFU counts
Real_MOI: real MOI input calculation for each transfer based on real virus concentration and bacteria concentration for each transfer
df=read.csv("experiment_2_30_min_transfer.csv")
# df contains 275 observations of 15 variables
str(df) # structure of columns
## 'data.frame': 275 obs. of 15 variables:
## $ Passage : chr "P1" "P1" "P1" "P1" ...
## $ Transfer : chr "T1" "T1" "T1" "T1" ...
## $ Passage_ID : int 1 1 1 1 1 2 2 2 2 2 ...
## $ Line : chr "L1" "L2" "L3" "L4" ...
## $ CFU : int 212 212 212 212 212 1260 1260 1260 1260 1260 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+04 1e+04 1e+04 1e+04 1e+04 ...
## $ CFU_ml : num 2.12e+08 2.12e+08 2.12e+08 2.12e+08 2.12e+08 1.26e+08 1.26e+08 1.26e+08 1.26e+08 1.26e+08 ...
## $ PFU : int NA NA NA NA NA NA NA NA NA NA ...
## $ Dilution_PFU : num NA NA NA NA NA NA NA NA NA NA ...
## $ PFU_ml : num NA NA NA NA NA NA NA NA NA NA ...
## $ Spot_assay_dilution : num 1e+05 1e+05 1e+05 1e+04 1e+04 1e+05 1e+07 1e+05 1e+05 1e+07 ...
## $ Output_phage_ml : num 5e+07 5e+07 5e+07 5e+06 5e+06 5e+07 5e+09 5e+07 5e+07 5e+09 ...
## $ Inoculation_volume_ml : num 0.0135 0.0135 0.0135 0.0135 0.0135 0.1 0.1 0.1 0.1 0.1 ...
## $ Real_virus_concentration: num 9990000 9990000 9990000 9990000 9990000 1000000 1000000 1000000 100000 100000 ...
## $ Real_MOI : num 0.0471 0.0471 0.0471 0.0471 0.0471 ...
summary(df) # summary of the columns
## Passage Transfer Passage_ID Line
## Length:275 Length:275 Min. : 1.00 Length:275
## Class :character Class :character 1st Qu.:10.00 Class :character
## Mode :character Mode :character Median :23.00 Mode :character
## Mean :23.91
## 3rd Qu.:37.00
## Max. :50.00
##
## CFU Dilution_CFU CFU_ml PFU
## Min. : 88.0 Min. : 10000 Min. : 68000000 Min. : 16.0
## 1st Qu.: 215.0 1st Qu.: 10000 1st Qu.:112000000 1st Qu.: 44.0
## Median :1116.0 Median : 10000 Median :140000000 Median : 98.5
## Mean : 985.2 Mean : 36182 Mean :142541818 Mean :103.1
## 3rd Qu.:1464.0 3rd Qu.:100000 3rd Qu.:170000000 3rd Qu.:145.8
## Max. :1944.0 Max. :100000 Max. :249000000 Max. :255.0
## NA's :225
## Dilution_PFU PFU_ml Spot_assay_dilution Output_phage_ml
## Min. :1.0e+07 Min. :7.300e+09 Min. : 10000 Min. :5.000e+06
## 1st Qu.:1.0e+07 1st Qu.:1.578e+10 1st Qu.: 100000 1st Qu.:5.000e+07
## Median :1.0e+08 Median :3.200e+10 Median : 100000 Median :5.000e+08
## Mean :6.4e+07 Mean :5.099e+10 Mean : 717746 Mean :9.984e+09
## 3rd Qu.:1.0e+08 3rd Qu.:5.675e+10 3rd Qu.: 1000000 3rd Qu.:5.000e+08
## Max. :1.0e+08 Max. :2.550e+11 Max. :10000000 Max. :2.550e+11
## NA's :225 NA's :225 NA's :62 NA's :12
## Inoculation_volume_ml Real_virus_concentration Real_MOI
## Min. :0.0001955 Min. : 100000 Min. :0.000558
## 1st Qu.:0.1000000 1st Qu.: 1000000 1st Qu.:0.007081
## Median :0.1000000 Median : 9990000 Median :0.044467
## Mean :0.0810217 Mean : 8947948 Mean :0.064543
## 3rd Qu.:0.1000000 3rd Qu.: 10000000 3rd Qu.:0.095495
## Max. :0.1000000 Max. :100000000 Max. :0.896057
## NA's :7 NA's :7
# Passage P2 failed to produce detectable infectious particles after Transfer 3. Therefore all 5 transfers of P2 were excluded from analysis. P2 was repeated as "P2b" for all five transfers
df<-df[df$Passage!="P2",]
# Set Block, Passage, Transfer, Line to factors
df$Passage<-as.factor(df$Passage)
df$Transfer<-as.factor(df$Transfer)
df$Line<-as.factor(df$Line)
# Set color pallet
cbPalette <- c("#E69F00", "#56B4E9", "#009E73", "#F0E442", "#0072B2", "#D55E00", "#999999")
# Set level order of passages for plotting
df$Passage <- factor(df$Passage , levels=c("P1", "P2b", "P3", "P4", "P5", "P6", "P7", "P8", "P9", "P10"))
# Plot bacteria concentration for each transfer
a<-ggplot(df,aes(Passage,CFU_ml, color=Transfer))+
geom_point()+ # dot plot
scale_color_manual(values=cbPalette)+ # color settings
xlab("Passage day")+ # label x-axis
ylab("CFU/ml")+ # label y-axis
theme_base()+ # plot layout
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "bottom") # font size and legend position
a
# Visualize all MOI's in one plot (e.g. violin plot with different lines)
ggplot(df,aes(Line, Real_MOI, fill=Line))+
geom_violin()+ # violin plot
scale_y_continuous(trans="log10")+ # log transformation of y-axis
theme_bw() # plot layout
Note –> Phages after each 5th transfer T5 is accurate (after 3h incubation), was isolated using chloroform, the other 4x30 min titers were determined after filtration using a spot test, which was too inaccurate. To quantify correct free phage titer after 30-min transfers, see Experiment 6: Free phage titer quantification
# Phage concentration for individual selection lines
## Subset dataframe to only transfer 5 for each passage
df1<-df[df$Transfer=="T5",]
# Plot phage titers for each selection line in an individual plot (comments on plot h apply also to plots i-l)
## Line 1
h<-ggplot(df1[df1$Line=="L1",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+ # dot plot with jitter to avoid point overlapping
scale_color_manual(values = cbPalette)+ # color setting
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+ # order of passages
xlab("Passage day")+ # label x-axis
ylab("Virus concentration (viruses/ml)")+ # label y-axis
ggtitle(("Selection line 1"))+ # plot title
theme_base()+ # plot layout
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none") # font size and legend position
h
## Line 2
i<-ggplot(df1[df1$Line=="L2",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 2"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
i
# Line 3
j<-ggplot(df1[df1$Line=="L3",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 3"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
j
# Line 4
k<-ggplot(df1[df1$Line=="L4",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 4"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
k
# Line 5
l<-ggplot(df1[df1$Line=="L5",],aes(Passage,Output_phage_ml, group=Transfer, color=Transfer))+
geom_point(position=position_jitterdodge())+
scale_color_manual(values = cbPalette)+
scale_x_discrete(limits = c("P1","P2b","P3","P4","P5","P6","P7","P8","P9","P10"))+
xlab("Passage day")+
ylab("Virus concentration (viruses/ml)")+
ggtitle(("Selection line 5"))+
theme_base()+
theme(text = element_text(size = 9),
axis.title = element_text(size =9 ),
axis.text.x = element_text(angle=90,size = 9),
legend.position = "none")
l
# Combined plots of all lines
grid.arrange(h,i,j,k,l)
Code used to generate Fig. S3A in the manuscript
READ ME Specification of columns in data set
Genotype: Phage genotypes for which adsorption assay was done, wild type (WT), small plaque mutants F(G322D) and F(S427*) both evolved during experiment 1 “3-hour transfers”, and large-plaque mutant F(T101A) evolved in experiment 2 “30-min transfers”
Run: Repeated experimental runs, different number of runs for each phage genotype - Wild type: Runs 1,2,3,4,5,6,7 - T101A: Runs 5,6,7, - G322D: Runs 1,2,3,4,5,6,7 - S427*: Runs 1.,2,3,4,5,7
Sampling_time_min: Sampling time point in minutes during the adsorption assay from time point 2 minutes (initiation of adsorption) to 6 minutes post adsorption in 2 minute time intervals.
PFU_1: Plaque-forming unit counts (PFU) for first plating of triplicate plating for lysate
PFU_2: Plaque-forming unit counts (PFU) for second plating of triplicate plating for lysate
PFU_3: Plaque-forming unit counts (PFU) for third plating of triplicate plating for lysate
Median_PFU: Median PFU counts from triplicate plating PFU_1, PFU_2, PFU_3
Predilution: After sampling at the adsorption time point 0 min to 6 min 500 µl of sample was diluted in 4.5 ml of unsupplemented LB cooled on ice to prevent the completion of the infection cycle after adsorption and secondary adsorptions. The dilution factor for all time points except t-1 (stock) for 10x dilution
Dilution_plating: Plating dilution used to plate 100 µl of phage lysate isolated after the adsorption assay
Dilution_in_ml: Dilution step to calculate phage concentration in PFU/ml. Factor is 10x because 100 µl of phage lysate were used to plating plaques
PFU_ml: Phage concentration in Plaque-forming units per ml (PFU/ml), calculated as Median_PFU * Predilution * Dilution_plating * Dilution_in_ml
CFU: Colony-forming-units of wild type E. coli C at the beginning of the adsorption period
Dilution_CFU: Plating dilution used to plate CFUs
Bacteria_CFU_ml: Bacteria concentration in colony-forming units per ml CFU/ml) at the start of the adsorption period. Calculated as counting colony-forming units (CFU) * Dilution_CFU * 10 (because 100 µl of Dilution_CFU were used for plating)
df<-read.csv("experiment_3_adsorption_assay.csv")
# -> 69 observations of 14 variables
str(df)# structure of columns
## 'data.frame': 69 obs. of 14 variables:
## $ Genotype : chr "G322D" "G322D" "G322D" "G322D" ...
## $ Run : int 1 1 1 2 2 2 3 3 3 4 ...
## $ Sampling_time_min: int 2 4 6 2 4 6 2 4 6 2 ...
## $ PFU_1 : int 51 18 22 68 152 36 36 33 42 191 ...
## $ PFU_2 : int 38 18 20 81 146 29 32 68 52 151 ...
## $ PFU_3 : int 49 24 18 72 96 37 15 40 71 129 ...
## $ Median_PFU : int 49 18 20 72 146 36 32 40 52 151 ...
## $ Predilution : int 10 10 10 10 10 10 10 10 10 10 ...
## $ Dilution_plating : num 1000 100 10 1000 100 10 1000 100 10 1000 ...
## $ Dilution_in_ml : int 10 10 10 10 10 10 10 10 10 10 ...
## $ PFU_ml : num 4900000 180000 20000 7200000 1460000 36000 3200000 400000 52000 15100000 ...
## $ CFU : int 202 202 202 121 121 121 251 251 251 221 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ CFU_ml : num 2.02e+08 2.02e+08 2.02e+08 1.21e+08 1.21e+08 1.21e+08 2.51e+08 2.51e+08 2.51e+08 2.21e+08 ...
summary(df) # summary of columns
## Genotype Run Sampling_time_min PFU_1
## Length:69 Min. :1.000 Min. :2 Min. : 17.00
## Class :character 1st Qu.:2.000 1st Qu.:2 1st Qu.: 33.00
## Mode :character Median :4.000 Median :4 Median : 57.00
## Mean :4.174 Mean :4 Mean : 71.97
## 3rd Qu.:6.000 3rd Qu.:6 3rd Qu.: 94.00
## Max. :7.000 Max. :6 Max. :261.00
## PFU_2 PFU_3 Median_PFU Predilution
## Min. : 13.00 Min. : 14.00 Min. : 14.00 Min. :10
## 1st Qu.: 37.00 1st Qu.: 40.00 1st Qu.: 39.00 1st Qu.:10
## Median : 61.00 Median : 61.00 Median : 58.00 Median :10
## Mean : 72.94 Mean : 74.13 Mean : 72.75 Mean :10
## 3rd Qu.: 95.00 3rd Qu.: 96.00 3rd Qu.: 91.00 3rd Qu.:10
## Max. :312.00 Max. :367.00 Max. :261.00 Max. :10
## Dilution_plating Dilution_in_ml PFU_ml CFU
## Min. : 1.0 Min. :10 Min. : 2800 Min. :121.0
## 1st Qu.: 10.0 1st Qu.:10 1st Qu.: 47000 1st Qu.:196.0
## Median : 100.0 Median :10 Median : 390000 Median :221.0
## Mean : 190.4 Mean :10 Mean : 1229058 Mean :235.2
## 3rd Qu.: 100.0 3rd Qu.:10 3rd Qu.: 1130000 3rd Qu.:267.0
## Max. :1000.0 Max. :10 Max. :15100000 Max. :391.0
## Dilution_CFU CFU_ml
## Min. :1e+05 Min. :121000000
## 1st Qu.:1e+05 1st Qu.:196000000
## Median :1e+05 Median :221000000
## Mean :1e+05 Mean :235217391
## 3rd Qu.:1e+05 3rd Qu.:267000000
## Max. :1e+05 Max. :391000000
# Convert Genotype and Run into factors
df$Genotype=as.factor(df$Genotype)
df$Run=as.factor(df$Run)
# Log transform free phage concentrations
df$log_PFU_ml=log(df$PFU_ml)
# Comments of plot a also apply to plots b-g
# Run 1
a=ggplot(df[df$Run=="1",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+ # dot plot
scale_y_continuous(trans="log10")+ # log transformation of y-axis
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+ # set color palette
theme_bw() # plot layout
a
# Run 2
b=ggplot(df[df$Run=="2",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
theme_bw()
b
# Run 3
c=ggplot(df[df$Run=="3",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
theme_bw()
c
# Run 4
d=ggplot(df[df$Run=="4",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#29485d"))+
theme_bw()
d
# Run 5
e=ggplot(df[df$Run=="5",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+
theme_bw()
e
# Run 6
f=ggplot(df[df$Run=="6",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ae017e","#29485d"))+
theme_bw()
f
# Run 7
g=ggplot(df[df$Run=="7",],aes(Sampling_time_min,PFU_ml, group=Genotype, color=Genotype))+
geom_point()+
# geom_smooth(method="lm", linetype="dashed", se=F)+
scale_y_continuous(trans="log10")+
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+
theme_bw()
g
# Creat a data set to record slopes and R^2 values for each genotype and run, using subset df to time point 2 as a template
dfslope=df[df$Sampling_time_min==2,]
#-> dfslope should contain 23 observations of 6 variables
# Subset slope data set to only columns Run, Genotype and CFU_ml (Bacteria concentration needed for adsorption constant calculation)
dfslope=dfslope[,c(2,1,14)]
# Create unique identifier combining Genotype and Run
dfslope$ID=paste(dfslope$Run,dfslope$Genotype,sep="_")
# Create columns to record slope and R^2 values
dfslope$slope=NA
dfslope$R2=NA # multiple R^2
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==1,])
summary(a) #slope=-0.99138, R^2= 0.9989
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 1, ])
##
## Residuals:
## 49 50 51
## 0.03783 -0.07566 0.03783
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.32905 0.14155 122.42 0.0052 **
## Sampling_time_min -0.99138 0.03276 -30.26 0.0210 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.09267 on 1 degrees of freedom
## Multiple R-squared: 0.9989, Adjusted R-squared: 0.9978
## F-statistic: 915.6 on 1 and 1 DF, p-value: 0.02103
dfslope[dfslope$ID=="1_WT",]$slope=- 0.99138
dfslope[dfslope$ID=="1_WT",]$R2= 0.9989
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==2,])
summary(a) #slope=-0.83966 , R^2=0.9997
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 2, ])
##
## Residuals:
## 52 53 54
## 0.01778 -0.03557 0.01778
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.40857 0.06654 261.62 0.00243 **
## Sampling_time_min -0.83966 0.01540 -54.52 0.01168 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.04356 on 1 degrees of freedom
## Multiple R-squared: 0.9997, Adjusted R-squared: 0.9993
## F-statistic: 2972 on 1 and 1 DF, p-value: 0.01168
dfslope[dfslope$ID=="2_WT",]$slope=- 0.83966
dfslope[dfslope$ID=="2_WT",]$R2= 0.9997
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==3,])
summary(a) #slope=-0.78892, R^2=0.9984
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 3, ])
##
## Residuals:
## 55 56 57
## -0.03669 0.07337 -0.03669
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.26296 0.13727 118.47 0.00537 **
## Sampling_time_min -0.78892 0.03177 -24.83 0.02562 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08986 on 1 degrees of freedom
## Multiple R-squared: 0.9984, Adjusted R-squared: 0.9968
## F-statistic: 616.6 on 1 and 1 DF, p-value: 0.02562
dfslope[dfslope$ID=="3_WT",]$slope=- 0.78892
dfslope[dfslope$ID=="3_WT",]$R2= 0.9984
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==4,])
summary(a) #slope=-0.9439, R^2=0.944
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 4, ])
##
## Residuals:
## 58 59 60
## 0.2654 -0.5308 0.2654
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.1557 0.9931 16.268 0.0391 *
## Sampling_time_min -0.9439 0.2299 -4.106 0.1521
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.6501 on 1 degrees of freedom
## Multiple R-squared: 0.944, Adjusted R-squared: 0.888
## F-statistic: 16.86 on 1 and 1 DF, p-value: 0.1521
dfslope[dfslope$ID=="4_WT",]$slope=- 0.9439
dfslope[dfslope$ID=="4_WT",]$R2= 0.944
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==5,])
summary(a) #slope=-0.7034, R^2 0.8892
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 5, ])
##
## Residuals:
## 61 62 63
## 0.2867 -0.5734 0.2867
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.3377 1.0728 13.365 0.0475 *
## Sampling_time_min -0.7034 0.2483 -2.833 0.2161
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7023 on 1 degrees of freedom
## Multiple R-squared: 0.8892, Adjusted R-squared: 0.7784
## F-statistic: 8.024 on 1 and 1 DF, p-value: 0.2161
dfslope[dfslope$ID=="5_WT",]$slope=- 0.7034
dfslope[dfslope$ID=="5_WT",]$R2= 0.8892
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==6,])
summary(a) #slope=-0.9422, R^2=0.9631
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 6, ])
##
## Residuals:
## 64 65 66
## 0.2128 -0.4257 0.2128
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.3928 0.7964 19.328 0.0329 *
## Sampling_time_min -0.9422 0.1843 -5.112 0.1230
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.5214 on 1 degrees of freedom
## Multiple R-squared: 0.9631, Adjusted R-squared: 0.9263
## F-statistic: 26.13 on 1 and 1 DF, p-value: 0.123
dfslope[dfslope$ID=="6_WT",]$slope=- 0.9422
dfslope[dfslope$ID=="6_WT",]$R2= 0.9631
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="WT" &df$Run==7,])
summary(a) #slope=-0.9246, slope=0.9389
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "WT" & df$Run == 7, ])
##
## Residuals:
## 67 68 69
## 0.2723 -0.5446 0.2723
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.4412 1.0188 15.156 0.0419 *
## Sampling_time_min -0.9246 0.2358 -3.921 0.1590
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.667 on 1 degrees of freedom
## Multiple R-squared: 0.9389, Adjusted R-squared: 0.8779
## F-statistic: 15.37 on 1 and 1 DF, p-value: 0.159
dfslope[dfslope$ID=="7_WT",]$slope=- 0.9246
dfslope[dfslope$ID=="7_WT",]$R2= 0.9389
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==1,])
summary(a) #slope=-1.3753, R^2=0.9867
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 1, ])
##
## Residuals:
## 1 2 3
## 0.1845 -0.3689 0.1845
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.9709 0.6902 26.037 0.0244 *
## Sampling_time_min -1.3753 0.1598 -8.609 0.0736 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4519 on 1 degrees of freedom
## Multiple R-squared: 0.9867, Adjusted R-squared: 0.9734
## F-statistic: 74.11 on 1 and 1 DF, p-value: 0.07362
dfslope[dfslope$ID=="1_G322D",]$slope=- 1.3753
dfslope[dfslope$ID=="1_G322D",]$R2= 0.9867
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==2,])
summary(a) #slope=-1.3246 , R^2= 0.9499
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 2, ])
##
## Residuals:
## 4 5 6
## -0.3512 0.7023 -0.3512
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.7899 1.3140 14.300 0.0444 *
## Sampling_time_min -1.3246 0.3041 -4.355 0.1437
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8602 on 1 degrees of freedom
## Multiple R-squared: 0.9499, Adjusted R-squared: 0.8998
## F-statistic: 18.97 on 1 and 1 DF, p-value: 0.1437
dfslope[dfslope$ID=="2_G322D",]$slope=- 1.3246
dfslope[dfslope$ID=="2_G322D",]$R2= 0.9499
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==3,])
summary(a) #slope-1.029916, R^21
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 3, ])
##
## Residuals:
## 7 8 9
## 0.006537 -0.013074 0.006537
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.031956 0.024458 696.4 0.000914 ***
## Sampling_time_min -1.029916 0.005661 -181.9 0.003499 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.01601 on 1 degrees of freedom
## Multiple R-squared: 1, Adjusted R-squared: 0.9999
## F-statistic: 3.31e+04 on 1 and 1 DF, p-value: 0.003499
dfslope[dfslope$ID=="3_G322D",]$slope=- 1.029916
dfslope[dfslope$ID=="3_G322D",]$R2= 1
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==4,])
summary(a) #slope=-1.3544, R^2=0.888
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 4, ])
##
## Residuals:
## 10 11 12
## 0.5555 -1.1110 0.5555
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.6836 2.0785 8.989 0.0705 .
## Sampling_time_min -1.3544 0.4811 -2.815 0.2173
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 1.361 on 1 degrees of freedom
## Multiple R-squared: 0.888, Adjusted R-squared: 0.7759
## F-statistic: 7.926 on 1 and 1 DF, p-value: 0.2173
dfslope[dfslope$ID=="4_G322D",]$slope=- 1.3544
dfslope[dfslope$ID=="4_G322D",]$R2= 0.888
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==5,])
summary(a) #slope=-1.3334, R^2= 0.9876
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 5, ])
##
## Residuals:
## 13 14 15
## 0.1728 -0.3455 0.1728
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.7647 0.6464 24.389 0.0261 *
## Sampling_time_min -1.3334 0.1496 -8.912 0.0711 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.4232 on 1 degrees of freedom
## Multiple R-squared: 0.9876, Adjusted R-squared: 0.9751
## F-statistic: 79.43 on 1 and 1 DF, p-value: 0.07114
dfslope[dfslope$ID=="5_G322D",]$slope=- 1.3334
dfslope[dfslope$ID=="5_G322D",]$R2= 0.9876
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==6,])
summary(a) #slope=-1.3502, R^2=0.9594
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 6, ])
##
## Residuals:
## 16 17 18
## 0.3208 -0.6415 0.3208
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.3173 1.2002 13.60 0.0467 *
## Sampling_time_min -1.3502 0.2778 -4.86 0.1292
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.7857 on 1 degrees of freedom
## Multiple R-squared: 0.9594, Adjusted R-squared: 0.9188
## F-statistic: 23.62 on 1 and 1 DF, p-value: 0.1292
dfslope[dfslope$ID=="6_G322D",]$slope=- 1.3502
dfslope[dfslope$ID=="6_G322D",]$R2= 0.9594
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="G322D" &df$Run==7,])
summary(a) #slope=-1.2309, R^2=0.9919
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "G322D" & df$Run == 7, ])
##
## Residuals:
## 19 20 21
## 0.1288 -0.2575 0.1288
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.0207 0.4818 33.25 0.0191 *
## Sampling_time_min -1.2309 0.1115 -11.04 0.0575 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3154 on 1 degrees of freedom
## Multiple R-squared: 0.9919, Adjusted R-squared: 0.9837
## F-statistic: 121.8 on 1 and 1 DF, p-value: 0.05752
dfslope[dfslope$ID=="7_G322D",]$slope=- 1.2309
dfslope[dfslope$ID=="7_G322D",]$R2= 0.9919
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==1,])
summary(a) #slope-1.2174, R^2= 0.9414
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 1, ])
##
## Residuals:
## 22 23 24
## 0.3508 -0.7016 0.3508
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.1233 1.3125 13.046 0.0487 *
## Sampling_time_min -1.2174 0.3038 -4.007 0.1557
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8592 on 1 degrees of freedom
## Multiple R-squared: 0.9414, Adjusted R-squared: 0.8828
## F-statistic: 16.06 on 1 and 1 DF, p-value: 0.1557
dfslope[dfslope$ID=="1_S427*",]$slope=- 1.2174
dfslope[dfslope$ID=="1_S427*",]$R2= 0.9414
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==2,])
summary(a) #slope=-1.03485 , R^2=0.9986
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 2, ])
##
## Residuals:
## 25 26 27
## 0.04534 -0.09069 0.04534
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 17.61483 0.16966 103.83 0.00613 **
## Sampling_time_min -1.03485 0.03927 -26.35 0.02415 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1111 on 1 degrees of freedom
## Multiple R-squared: 0.9986, Adjusted R-squared: 0.9971
## F-statistic: 694.5 on 1 and 1 DF, p-value: 0.02415
dfslope[dfslope$ID=="2_S427*",]$slope=- 1.03485
dfslope[dfslope$ID=="2_S427*",]$R2= 0.9986
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==3,])
summary(a) #slope=-1.3484, R^2=0.9893
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 3, ])
##
## Residuals:
## 28 29 30
## 0.1621 -0.3242 0.1621
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 18.0550 0.6066 29.765 0.0214 *
## Sampling_time_min -1.3484 0.1404 -9.604 0.0660 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3971 on 1 degrees of freedom
## Multiple R-squared: 0.9893, Adjusted R-squared: 0.9786
## F-statistic: 92.24 on 1 and 1 DF, p-value: 0.06605
dfslope[dfslope$ID=="3_S427*",]$slope=- 1.3484
dfslope[dfslope$ID=="3_S427*",]$R2= 0.9893
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==4,])
summary(a) #slope=-0.9737, R^2= 0.9814
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 4, ])
##
## Residuals:
## 31 32 33
## 0.1548 -0.3095 0.1548
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.0635 0.5791 26.011 0.0245 *
## Sampling_time_min -0.9737 0.1340 -7.265 0.0871 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.3791 on 1 degrees of freedom
## Multiple R-squared: 0.9814, Adjusted R-squared: 0.9628
## F-statistic: 52.77 on 1 and 1 DF, p-value: 0.08709
dfslope[dfslope$ID=="4_S427*",]$slope=- 0.9737
dfslope[dfslope$ID=="4_S427*",]$R2= 0.9814
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==5,])
summary(a) #slope=-1.3133, R^2= 0.9513
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 5, ])
##
## Residuals:
## 34 35 36
## 0.3433 -0.6865 0.3433
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 16.5109 1.2844 12.855 0.0494 *
## Sampling_time_min -1.3133 0.2973 -4.418 0.1417
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8408 on 1 degrees of freedom
## Multiple R-squared: 0.9513, Adjusted R-squared: 0.9025
## F-statistic: 19.52 on 1 and 1 DF, p-value: 0.1417
dfslope[dfslope$ID=="5_S427*",]$slope=- 1.3133
dfslope[dfslope$ID=="5_S427*",]$R2= 0.9513
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="S427*" &df$Run==7,])
summary(a) #slope=-1.2369, R^2=0.9416
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "S427*" & df$Run == 7, ])
##
## Residuals:
## 37 38 39
## 0.3557 -0.7115 0.3557
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 15.5624 1.3311 11.692 0.0543 .
## Sampling_time_min -1.2369 0.3081 -4.015 0.1554
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.8714 on 1 degrees of freedom
## Multiple R-squared: 0.9416, Adjusted R-squared: 0.8832
## F-statistic: 16.12 on 1 and 1 DF, p-value: 0.1554
dfslope[dfslope$ID=="7_S427*",]$slope=- 1.2369
dfslope[dfslope$ID=="7_S427*",]$R2= 0.9416
# Each model represents an individual experimental run
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==5,])
summary(a) #slope=-0.28577, R^2= 0.9814
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "T101A" & df$Run == 5, ])
##
## Residuals:
## 40 41 42
## 0.04540 -0.09081 0.04540
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.66372 0.16989 86.314 0.00738 **
## Sampling_time_min -0.28577 0.03932 -7.267 0.08705 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1112 on 1 degrees of freedom
## Multiple R-squared: 0.9814, Adjusted R-squared: 0.9628
## F-statistic: 52.82 on 1 and 1 DF, p-value: 0.08705
dfslope[dfslope$ID=="5_T101A",]$slope=- 0.28577
dfslope[dfslope$ID=="5_T101A",]$R2= 0.9814
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==6,])
summary(a) #slope=-0.34008, R^2= 0.993
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "T101A" & df$Run == 6, ])
##
## Residuals:
## 43 44 45
## -0.03298 0.06595 -0.03298
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.94736 0.12339 121.14 0.00526 **
## Sampling_time_min -0.34008 0.02856 -11.91 0.05334 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.08078 on 1 degrees of freedom
## Multiple R-squared: 0.993, Adjusted R-squared: 0.986
## F-statistic: 141.8 on 1 and 1 DF, p-value: 0.05334
dfslope[dfslope$ID=="6_T101A",]$slope=- 0.34008
dfslope[dfslope$ID=="6_T101A",]$R2= 0.993
a=lm(formula=log_PFU_ml~Sampling_time_min,data=df[df$Genotype=="T101A" &df$Run==7,])
summary(a) #slope=-0.20034, R^2= 0.9875
##
## Call:
## lm(formula = log_PFU_ml ~ Sampling_time_min, data = df[df$Genotype ==
## "T101A" & df$Run == 7, ])
##
## Residuals:
## 46 47 48
## -0.02607 0.05214 -0.02607
##
## Coefficients:
## Estimate Std. Error t value Pr(>|t|)
## (Intercept) 14.68695 0.09755 150.562 0.00423 **
## Sampling_time_min -0.20034 0.02258 -8.873 0.07144 .
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06386 on 1 degrees of freedom
## Multiple R-squared: 0.9875, Adjusted R-squared: 0.9749
## F-statistic: 78.74 on 1 and 1 DF, p-value: 0.07144
dfslope[dfslope$ID=="7_T101A",]$slope=- 0.20034
dfslope[dfslope$ID=="7_T101A",]$R2= 0.9875
summary(dfslope)
## Run Genotype CFU_ml ID slope
## 1:3 G322D:7 Min. :121000000 Length:23 Min. :-1.3753
## 2:3 S427*:6 1st Qu.:196000000 Class :character 1st Qu.:-1.3190
## 3:3 T101A:3 Median :221000000 Mode :character Median :-1.0299
## 4:3 WT :7 Mean :235217391 Mean :-1.0036
## 5:4 3rd Qu.:267000000 3rd Qu.:-0.8821
## 6:3 Max. :391000000 Max. :-0.2003
## 7:4
## R2
## Min. :0.8880
## 1st Qu.:0.9469
## Median :0.9814
## Mean :0.9679
## 3rd Qu.:0.9925
## Max. :1.0000
##
# min: 0.89
# max: 1.00
Formula for calculating k: k (ml^-1 min^-1)= -slope/Bacteria concentration (CFU_ml)
# Calculate adsorption rate constant k
dfslope$k=-(dfslope$slope)/(dfslope$CFU_ml)
#write.csv(dfslope,"experiment_3_slope_analysis.csv")
## WT
mean(dfslope[dfslope$Genotype=="WT",]$k) #4.146119e-09
## [1] 4.146119e-09
std.error(dfslope[dfslope$Genotype=="WT",]$k) #5.982199e-10
## [1] 5.982199e-10
## T101A
mean(dfslope[dfslope$Genotype=="T101A",]$k) # 9.874041e-10
## [1] 9.874041e-10
std.error(dfslope[dfslope$Genotype=="T101A",]$k) #6.043767e-11
## [1] 6.043767e-11
## G322D
mean(dfslope[dfslope$Genotype=="G322D",]$k) # 6.102084e-09
## [1] 6.102084e-09
std.error(dfslope[dfslope$Genotype=="G322D",]$k) #9.289692e-10
## [1] 9.289692e-10
## S427*
mean(dfslope[dfslope$Genotype=="S427*",]$k) #5.931108e-09
## [1] 5.931108e-09
std.error(dfslope[dfslope$Genotype=="S427*",]$k) #5.968958e-10
## [1] 5.968958e-10
# Test for normal distribution
shapiro.test(dfslope$k) # passed: W = 0.9632, p-value = 0.5309
##
## Shapiro-Wilk normality test
##
## data: dfslope$k
## W = 0.9632, p-value = 0.5309
# Homogeneity of variances
leveneTest(k ~ Genotype, data = dfslope) # passed: group DF= 3, group sample= 19, F-value= 1.2369 p=0.324
## Levene's Test for Homogeneity of Variance (center = median)
## Df F value Pr(>F)
## group 3 1.2369 0.324
## 19
# Linear mixed effects model with Genotype as fixed effect and run as random effect
## Full model
m1<- lmer(k~Genotype +(1|Run) ,data= dfslope )
## Null model
m2<- lmer(k~1 +(1|Run),data=dfslope)
# Model comparison using anova to test if including genotype into the model significantly contributes to explaining adsorption constant variation
anova(m1,m2)
## refitting model(s) with ML (instead of REML)
## Data: dfslope
## Models:
## m2: k ~ 1 + (1 | Run)
## m1: k ~ Genotype + (1 | Run)
## npar AIC BIC logLik deviance Chisq Df Pr(>Chisq)
## m2 3 -847.21 -843.80 426.60 -853.21
## m1 6 -870.97 -864.16 441.49 -882.97 29.764 3 1.548e-06 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
Results of Anova Analysis of Variance Table Models: m2: k ~
1 + (1 | Run) m1: k ~ Genotype + (1 | Run) npar AIC BIC logLik deviance
Chisq Df Pr(>Chisq)
m2 3 -847.21 -843.80 426.60 -853.21
m1 6 -870.97 -864.16 441.49 -882.97 29.764 3 1.548e-06 ***
summary(m1)
## Linear mixed model fit by REML ['lmerMod']
## Formula: k ~ Genotype + (1 | Run)
## Data: dfslope
##
## REML criterion at convergence: -718.6
##
## Scaled residuals:
## Min 1Q Median 3Q Max
## -1.23170 -0.50637 0.04028 0.48679 1.95181
##
## Random effects:
## Groups Name Variance Std.Dev.
## Run (Intercept) 2.822e-18 1.680e-09
## Residual 6.623e-19 8.138e-10
## Number of obs: 23, groups: Run, 7
##
## Fixed effects:
## Estimate Std. Error t value
## (Intercept) 6.102e-09 7.055e-10 8.649
## GenotypeS427* -4.454e-10 4.586e-10 -0.971
## GenotypeT101A -4.292e-09 5.983e-10 -7.173
## GenotypeWT -1.956e-09 4.350e-10 -4.496
##
## Correlation of Fixed Effects:
## (Intr) GS427* GT101A
## GentypS427* -0.292
## GentypT101A -0.224 0.313
## GenotypeWT -0.308 0.474 0.364
Results of Model m1 summary Linear mixed model fit by REML [‘lmerMod’] Formula: k ~ Genotype + (1 | Run) Data: dfslope
REML criterion at convergence: -718.6
Scaled residuals: Min 1Q Median 3Q Max -1.23170 -0.50637 0.04028 0.48679 1.95181
Random effects: Groups Name Variance Std.Dev. Run (Intercept) 2.822e-18 1.680e-09 Residual 6.623e-19 8.138e-10 Number of obs: 23, groups: Run, 7
Fixed effects: Estimate Std. Error t value (Intercept) 6.102e-09 7.055e-10 8.649 GenotypeS427* -4.454e-10 4.586e-10 -0.971 GenotypeT101A -4.292e-09 5.983e-10 -7.173 GenotypeWT -1.956e-09 4.350e-10 -4.496
Correlation of Fixed Effects: (Intr) GS427* GT101A GentypS427*
-0.292
GentypT101A -0.224 0.313
GenotypeWT -0.308 0.474 0.364
# Using Tukey-HSD as Post-Hoc test for mutliple comparison
summary(glht(m1,linfct=mcp(Genotype="Tukey")),test=adjusted("holm"))
##
## Simultaneous Tests for General Linear Hypotheses
##
## Multiple Comparisons of Means: Tukey Contrasts
##
##
## Fit: lmer(formula = k ~ Genotype + (1 | Run), data = dfslope)
##
## Linear Hypotheses:
## Estimate Std. Error z value Pr(>|z|)
## S427* - G322D == 0 -4.454e-10 4.586e-10 -0.971 0.331457
## T101A - G322D == 0 -4.292e-09 5.983e-10 -7.173 4.40e-12 ***
## WT - G322D == 0 -1.956e-09 4.350e-10 -4.496 2.76e-05 ***
## T101A - S427* == 0 -3.846e-09 6.296e-10 -6.109 5.02e-09 ***
## WT - S427* == 0 -1.511e-09 4.586e-10 -3.294 0.001977 **
## WT - T101A == 0 2.336e-09 5.983e-10 3.904 0.000284 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
## (Adjusted p values reported -- holm method)
Results of Post-Hoc Multiple comparison testing Simultaneous Tests for General Linear Hypotheses
Multiple Comparisons of Means: Tukey Contrasts
Fit: lmer(formula = k ~ Genotype + (1 | Run), data = dfslope)
Linear Hypotheses: Estimate Std. Error z value Pr(>|z|)
S427* - G322D == 0 -4.454e-10 4.586e-10 -0.971 0.331457
T101A - G322D == 0 -4.292e-09 5.983e-10 -7.173 4.40e-12 WT
- G322D == 0 -1.956e-09 4.350e-10 -4.496 2.76e-05 T101A -
S427* == 0 -3.846e-09 6.296e-10 -6.109 5.02e-09 WT -
S427 == 0 -1.511e-09 4.586e-10 -3.294 0.001977 WT - T101A
== 0 2.336e-09 5.983e-10 3.904 0.000284 ***
Signif. codes: 0 ‘’ 0.001 ‘’ 0.01 ‘’ 0.05 ‘.’ 0.1 ‘ ’ 1 (Adjusted p values reported – holm method)
#output_file <- "adsorption_statistics_model_checking.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
## Plot layout
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(m1))
## 4 16
## 2 6
## residuals vs fitted values plot
plot(residuals(m1)~fitted(m1))
abline(a=0, b=0, col="red")
#dev.off()
(Figure Fig. 3A)
# Rearrange order of genotypes in ascending order
dfslope$Genotype <- factor(dfslope$Genotype , levels=c("T101A", "WT","S427*", "G322D"))
# visualize results of adsorption rate constant
a=ggplot(dfslope,aes(Genotype,k,fill=as.factor(Genotype)))+
geom_boxplot(alpha=0.8)+ #boxplot
geom_point()+ # individual adsorption constants as dots
annotate(geom="text", x=1.2, y=1.15e-09, label="a",
color="black", size=5)+ # significance letter F(T101A)
annotate(geom="text", x=2.2, y=5.25e-09, label="b",
color="black", size=5)+ # significance letter Ancestor
annotate(geom="text", x=3.2, y=6.8e-09, label="c",
color="black", size=5)+ # significance letter F(S427*)
annotate(geom="text", x=4.2, y=7.1e-09, label="c",
color="black", size=5)+ # significance letter F(G322D)
scale_y_continuous(trans="log10",
labels=trans_format("log10", math_format(10^.x)),
breaks=c(1e-09,1e-08))+ # log transformation of y-axis
scale_x_discrete(breaks=c("T101A","WT","S427*","G322D"),
labels=c("F(T101A)","Ancestor","F(S427*)","F(G322D)"))+ # change label of mutants on the x-axis
scale_fill_manual(values=c("#ae017e","#29485d","#ef7c00","#c6d325"))+ # color setting
labs(x = "Genotype",
y = expression(paste("Adsorption constant (ml"^"-1","min"^"-1 ",")")))+ #labels of x and y axis
theme_bw()+ # plot layout
theme(legend.position = "none")+ # no legend
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
a
# Save plot at 600 dpi resolution
#ggsave("adsorption.jpeg", plot = a, dpi = 600, width = 8, height = 6)
Code used to generate Fig. 3B and Fig. S4 in the manuscript
READ ME Specification of columns in data set
Block: Experimental block during which the phage isolate was evolved - 0: wild type non-evolved - A: 30-minute transfer experiment - 1: 3-hour transfer experiment
Phenotype: Lysis plaque phenotype forming large plaques (LP) or small plaques (SP)
Line: Selection line phage was evolved in - WT: wild type non-evolved - L3: 30-minute transfer experiment - L1 and L4: 3-hour transfer experiment
Image: Image of individual plaque assay plate, for each phage isolate, four images labeled A to D were taken
Colony: Individual ID of a lysis plaque on a plaque assay plate selected for image analysis. Plaques were manually selected checking for correct recognition of the mask (i.e. intact area, non-connected, individual plaques)
Area: Lysis plaque area measured by the algorithm of Octavio
Distance: Distance to the plaque periphery in pixels (px)
Intensity: plaque intensity in pixels (px) at individual distance position within the plaque
Intensity_SD: standard deviation of pixel intensity
ID: unique identifier combining block, phenotype, and line
df=read.csv("experiment_4_plaque_image_analysis.csv")
# df consists of 52687 observations and 10 variables
str(df) # structure of columns
## 'data.frame': 52687 obs. of 10 variables:
## $ Block : chr "0" "0" "0" "0" ...
## $ Phenotype : chr "LP" "LP" "LP" "LP" ...
## $ Line : chr "WT" "WT" "WT" "WT" ...
## $ Image : chr "A" "A" "A" "A" ...
## $ Colony : int 8 8 8 8 8 8 8 8 8 8 ...
## $ Area : num 0.555 0.555 0.555 0.555 0.555 ...
## $ Distance : num 1 1.41 2 2.24 2.83 ...
## $ Intensity : num 100 97.2 95.5 94.6 94 ...
## $ Intensity_SD: num 3.1 1.99 1.81 1.65 1.4 ...
## $ ID : chr "0_LP_WT" "0_LP_WT" "0_LP_WT" "0_LP_WT" ...
summary(df) #summary of columns
## Block Phenotype Line Image
## Length:52687 Length:52687 Length:52687 Length:52687
## Class :character Class :character Class :character Class :character
## Mode :character Mode :character Mode :character Mode :character
##
##
##
## Colony Area Distance Intensity
## Min. : 3.0 Min. :0.001384 Min. : 1.000 Min. : 60.00
## 1st Qu.: 19.0 1st Qu.:0.071833 1st Qu.: 5.831 1st Qu.: 76.75
## Median : 43.0 Median :0.294716 Median :10.296 Median : 83.00
## Mean :102.5 Mean :0.334197 Mean :12.869 Mean : 85.62
## 3rd Qu.:117.0 3rd Qu.:0.534672 3rd Qu.:19.105 3rd Qu.: 92.50
## Max. :621.0 Max. :1.524953 Max. :45.880 Max. :154.25
## Intensity_SD ID
## Min. : 0.0000 Length:52687
## 1st Qu.: 0.9428 Class :character
## Median : 1.7916 Mode :character
## Mean : 2.1207
## 3rd Qu.: 2.8986
## Max. :46.1976
Code used to generate Fig. 3B
Each colony id corresponds to one measured plaques. Multiple intensity and distance values exist for each colony id but only one area value. To visualize plaque areas, df has to be summarized according to area.
# Make a copy of the df to work with for the area analysis
df1=df
# Subset df1 used for area analysis by removing columns "Distance", "Intensity", and "Intensity_SD"
df1=df1[,-c(7:9)]
# Create unique identifier per colony & image
df1$ColID=paste(df1$ID,df1$Colony,df1$Image,sep="_")
# Summarize data to that only one area per colony id exists
df1=df1 %>%
group_by(ColID) %>%
summarise(across(c(Area),mean))
# Seperate unique identifier column "ColID" into individual columnse
df1=separate(df1,col=ColID,into=c("Block","Phenotype","Line","Colony", "Image"),sep="_")
# Convert Phenotype and Line into factors
df1$Phenotype=as.factor(df1$Phenotype)
df1$Line=as.factor(df1$Line)
# Create unique identifier "ID" combining Block, Phenotype, and Line
df1$ID=paste(df1$Block,df1$Phenotype,df1$Line,sep="_")
df1$ID=as.factor(df1$ID)
# Add a column with genotype information
df1$Genotype=NA
# Add genotypes
df1[df1$ID=="0_LP_WT",]$Genotype="WT"
df1[df1$ID=="1_SP_L1",]$Genotype="G322D"
df1[df1$ID=="1_SP_L4",]$Genotype="S427*"
df1[df1$ID=="A_LP_L3",]$Genotype="T101A"
# Convert Genotype into factor
df1$Genotype=as.factor(df1$Genotype)
# Rearrange Genotypes based on plaque size in decenting order
df1$Genotype <- factor(df1$Genotype , levels=c("T101A", "WT","S427*", "G322D"))
# Visualize plaque area using a combined box and violin plot (Fig. 3B)
a=ggplot(df1,aes(Genotype,Area, fill=Genotype))+
geom_violin(alpha=0.8)+ # violin plot
geom_boxplot(width=0.1, fill="white", outlier.shape=NA)+ # boxplot
scale_y_continuous(trans="log10",
labels=trans_format("log10", math_format(10^.x)),
breaks=c(1e-03,1e-02,1e-01,1e+00))+ # log scale with exponential y-ticks
scale_x_discrete(breaks=c("T101A","WT","S427*","G322D"),
labels=c("F(T101A)","Ancestor","F(S427*)","F(G322D)"))+ # Change x labels
scale_fill_manual(values=c("#ae017e","#29485d","#ef7c00","#c6d325"))+ #setting colors of violin plot
labs(x = "Genotype",
y = expression(paste("Plaque area (mm"^"2",")")))+ # labels of x and y axis
theme_bw()+ # plot layout
theme(legend.position = "none")+ # remove figure legend
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # set font size
a
# Save plot at high resolution resolution
# ggsave("plaque_area.jpeg", plot = a, dpi = 600, width = 8, height = 6)
# Summary of total number of plaques analyzed for each genotype
summary(df1)
## Block Phenotype Line Colony Image
## Length:870 LP:136 L1:303 Length:870 Length:870
## Class :character SP:734 L3: 60 Class :character Class :character
## Mode :character L4:431 Mode :character Mode :character
## WT: 76
##
##
## Area ID Genotype
## Min. :0.001384 0_LP_WT: 76 T101A: 60
## 1st Qu.:0.028572 1_SP_L1:303 WT : 76
## Median :0.051452 1_SP_L4:431 S427*:431
## Mean :0.112703 A_LP_L3: 60 G322D:303
## 3rd Qu.:0.095175
## Max. :1.524953
# F(G322D): 303
# F(S427*): 431
# F(T101A): 60
# ancestor: 76
# Calculate mean and standard error for plaque area of each genotype
## Ancestor
mean(df1[df1$Genotype=="WT",]$Area) # 0.375
## [1] 0.3749175
std.error(df1[df1$Genotype=="WT",]$Area) # 0.021
## [1] 0.02140839
## F(T101A)
mean(df1[df1$Genotype=="T101A",]$Area) #0.518
## [1] 0.5184235
std.error(df1[df1$Genotype=="T101A",]$Area) # 0.039
## [1] 0.0391119
## F(G322D)
mean(df1[df1$Genotype=="G322D",]$Area) # 0.033
## [1] 0.03278611
std.error(df1[df1$Genotype=="G322D",]$Area) # 0.001
## [1] 0.001302967
## F(S427*)
mean(df1[df1$Genotype=="S427*",]$Area) #0.066
## [1] 0.06616785
std.error(df1[df1$Genotype=="S427*",]$Area) #0.002
## [1] 0.00189727
Code for supplementary figure Fig. S4
# Make a copy of df
df2=df
# Create unique identifier per colony & image
df2$ColID=paste(df2$ID,df2$Colony,df2$Image,sep="_")
# Summarize data to that only one area per colony id exists
df2=df2 %>%
group_by(ColID) %>%
summarise(across(c(Area, Intensity),mean))
# Seperate unique identifier column "ColID" into individual columnse
df2=separate(df2,col=ColID,into=c("Block","Phenotype","Line","Colony", "Image"),sep="_")
# Convert Phenotype and Line into factors
df2$Phenotype=as.factor(df2$Phenotype)
df2$Line=as.factor(df2$Line)
# Create unique identifier "ID" combining Block, Phenotype, and Line
df2$ID=paste(df2$Block,df2$Phenotype,df2$Line,sep="_")
df2$ID=as.factor(df2$ID)
# Add a column with genotype information
df2$Genotype=NA
# Add genotypes
df2[df2$ID=="0_LP_WT",]$Genotype="Ancestor"
df2[df2$ID=="1_SP_L1",]$Genotype="F(G322D)"
df2[df2$ID=="1_SP_L4",]$Genotype="F(S427*)"
df2[df2$ID=="A_LP_L3",]$Genotype="F(T101A)"
# Convert Genotype into factor
df2$Genotype=as.factor(df2$Genotype)
# Set levels of genotypes form highest to lowest turbidty
df2$Genotype <- factor(df2$Genotype , levels=c("F(G322D)", "F(S427*)","F(T101A)", "Ancestor"))
# Plot results using a density plot
x=ggplot(df2,aes(x=Area,y=Intensity,color=Genotype)) +
geom_density_2d()+ # density plot with concentric areas
scale_x_continuous(trans="log10")+ # log transform area axis
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#29485d"))+ # set color palette
labs(x = expression(paste(" Plaque area (mm"^"2 ",")")),
y="Turbidity")+ # labels of x- and y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom")+ # legend position
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
# Save image at high resolution as jpeg file
# ggsave("turbidity.jpeg", plot = x, dpi = 600, width = 8, height = 6)
Code used to generate Fig. 4D in the manuscript
READ ME Specification of columns in data set
-Treatment: Treatment Filtration: Samples were sterile filtered using a 0.2 µm filter to remove bateria and phages still inside bacterial hosts
-Transfer: Transfer identity, Transfers T1, T2, T3, and T4 were 30-minute transfers after which 100 µl of phage-bacteria mix was passaged onto fresh, susceptible ancestral bacteria cells
-Line: Selection line. Genotype used to start the transfer experiment with (4 in total) - WT: ancestral wild type phage genotype, large plaque former - T101A: mutant T101A forming large plaques, evolved during 30-minute transfers in MS_002-exp_4
-CFU: colony-forming units of the ancestral E. coli C culture in mid-log phase at the moment of infection initiation
-CFU_dilution: plating dilution used to plate 100 µl of bacterial culture for CFU counting
-CFU_ml: colony-forming units in ml, calculated by CFUCFU_dilution10 (because 100 µl were plated)
-PFU_LP_1: plaque-forming units of the large-plaque phenotype for each sample of replicate plating 1
-dilution_1: plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 1
-PFU_LP_2: plaque-forming units of the large-plaque phenotype for each sample of replicate plating 2
-dilution_2: plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 2
-PFU_LP_3: plaque-forming units of the large-plaque phenotype for each sample of replicate plating 3
-dilution_3: plating dilution used for PFU counts, 100 µl of plating dilution was plated for plaque counting of replicate plating 3
-PFU_ml_LP1: plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 1
-PFU_ml_LP2: plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 2
-PFU_ml_LP3: plaque-forming units per ml of large plaque phenotype calculated as PFUdilution_110 (because 100 µl were plated) of replicate plating 3
-Median_PFU_ml: Median large plaque PFU per ml calculated based on phage concentrations of the three replicate platings
-Median_WT: Median of titering for the wild type taken from column “Median_PFU_ml” used to calculate the ration between free phages of mutant F(T101A) and the ancestor
df=read.csv("experiment_5_30min_free_phages.csv")
# df has 24 observations and 18 variables
str(df) # structure of data columns
## 'data.frame': 24 obs. of 18 variables:
## $ Replicate : int 1 1 1 1 1 1 1 1 2 2 ...
## $ Treatment : chr "Filtration" "Filtration" "Filtration" "Filtration" ...
## $ Transfer : chr "T1" "T2" "T3" "T4" ...
## $ Line : chr "T101A" "T101A" "T101A" "T101A" ...
## $ CFU : int 148 136 78 135 148 136 78 135 75 73 ...
## $ CFU_dilution : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ CFU_ml : num 1.48e+08 1.36e+08 7.80e+07 1.35e+08 1.48e+08 1.36e+08 7.80e+07 1.35e+08 7.50e+07 7.30e+07 ...
## $ PFU_LP_1 : int 14 26 194 139 26 97 168 145 53 170 ...
## $ dilution_1 : num 1e+06 1e+07 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+06 1e+07 ...
## $ PFU_LP_2 : int 70 28 23 95 60 96 15 78 49 113 ...
## $ dilution_2 : num 1e+05 1e+07 1e+07 1e+06 1e+05 1e+06 1e+07 1e+06 1e+06 1e+07 ...
## $ PFU_LP_3 : int 80 26 21 95 20 80 60 47 52 96 ...
## $ dilution_3 : num 1e+05 1e+07 1e+07 1e+06 1e+05 1e+06 1e+06 1e+06 1e+06 1e+07 ...
## $ PFU_ml1 : num 1.40e+08 2.60e+09 1.94e+09 1.39e+09 2.60e+08 ...
## $ PFU_ml2 : num 7.0e+07 2.8e+09 2.3e+09 9.5e+08 6.0e+07 ...
## $ PFU_ml3 : num 8.0e+07 2.6e+09 2.1e+09 9.5e+08 2.0e+07 8.0e+08 6.0e+08 4.7e+08 5.2e+08 9.6e+09 ...
## $ Median_PFU_ml: num 8.0e+07 2.6e+09 2.1e+09 9.5e+08 6.0e+07 ...
## $ Median_WT : num 6.0e+07 9.6e+08 1.5e+09 7.8e+08 6.0e+07 9.6e+08 1.5e+09 7.8e+08 2.9e+08 7.3e+09 ...
summary(df) # summary of data columns
## Replicate Treatment Transfer Line
## Min. :1 Length:24 Length:24 Length:24
## 1st Qu.:1 Class :character Class :character Class :character
## Median :2 Mode :character Mode :character Mode :character
## Mean :2
## 3rd Qu.:3
## Max. :3
## CFU CFU_dilution CFU_ml PFU_LP_1
## Min. : 73.0 Min. :1e+05 Min. : 73000000 Min. : 14.00
## 1st Qu.: 96.0 1st Qu.:1e+05 1st Qu.: 96000000 1st Qu.: 25.25
## Median :135.5 Median :1e+05 Median :135500000 Median : 43.50
## Mean :150.6 Mean :1e+05 Mean :150583333 Mean : 72.12
## 3rd Qu.:150.8 3rd Qu.:1e+05 3rd Qu.:150750000 3rd Qu.:109.00
## Max. :354.0 Max. :1e+05 Max. :354000000 Max. :194.00
## dilution_1 PFU_LP_2 dilution_2 PFU_LP_3
## Min. : 1000000 Min. : 15.00 Min. :1.00e+05 Min. : 15.00
## 1st Qu.: 1000000 1st Qu.: 22.75 1st Qu.:1.00e+06 1st Qu.: 25.25
## Median : 10000000 Median : 45.50 Median :1.00e+07 Median : 45.00
## Mean : 17125000 Mean : 50.96 Mean :1.78e+07 Mean : 52.54
## 3rd Qu.: 10000000 3rd Qu.: 75.00 3rd Qu.:1.00e+07 3rd Qu.: 80.00
## Max. :100000000 Max. :113.00 Max. :1.00e+08 Max. :128.00
## dilution_3 PFU_ml1 PFU_ml2
## Min. : 100000 Min. :1.400e+08 Min. :6.000e+07
## 1st Qu.: 1000000 1st Qu.:8.600e+08 1st Qu.:7.075e+08
## Median : 10000000 Median :2.270e+09 Median :2.550e+09
## Mean : 17425000 Mean :5.562e+09 Mean :5.272e+09
## 3rd Qu.: 10000000 3rd Qu.:8.100e+09 3rd Qu.:7.625e+09
## Max. :100000000 Max. :1.900e+10 Max. :2.200e+10
## PFU_ml3 Median_PFU_ml Median_WT
## Min. :2.000e+07 Min. :6.000e+07 Min. :6.000e+07
## 1st Qu.:5.075e+08 1st Qu.:7.150e+08 1st Qu.:6.575e+08
## Median :2.650e+09 Median :2.350e+09 Median :1.800e+09
## Mean :5.344e+09 Mean :5.366e+09 Mean :3.560e+09
## 3rd Qu.:9.375e+09 3rd Qu.:8.700e+09 3rd Qu.:5.425e+09
## Max. :2.300e+10 Max. :2.200e+10 Max. :1.280e+10
# Turn line into a factor
df$Line=as.factor(df$Line)
df$ratio=df$Median_PFU_ml/df$Median_WT
# for WT, ratio should be 1, check if this is true
summary(df[df$Line=="WT",]$ratio)
## Min. 1st Qu. Median Mean 3rd Qu. Max.
## 1 1 1 1 1 1
# Remove all WT (ratio value=1)
df1=df[df$Line!="WT",]
# Create a column for ratio mean and se
df1$mean=NA
df1$st.error=NA
# Mean and standard error of all transfer T1 for three replicates
mean(df1[df1$Transfer=="T1",]$ratio) #1.51
## [1] 1.505914
std.error(df1[df1$Transfer=="T1",]$ratio) #0.14
## [1] 0.1445667
## add values to table
df1[df1$Transfer=="T1",]$mean=1.51
df1[df1$Transfer=="T1",]$st.error=0.14
# Mean and standard error of all transfer T2 for three replicates
mean(df1[df1$Transfer=="T2",]$ratio) #1.99
## [1] 1.991676
std.error(df1[df1$Transfer=="T2",]$ratio) #0.36
## [1] 0.3617051
## add values to table
df1[df1$Transfer=="T2",]$mean=1.99
df1[df1$Transfer=="T2",]$st.error=0.36
# Mean and standard error of all transfer T3 for three replicates
mean(df1[df1$Transfer=="T3",]$ratio) #1.84
## [1] 1.836898
std.error(df1[df1$Transfer=="T3",]$ratio) #0.22
## [1] 0.2184878
## add values to table
df1[df1$Transfer=="T3",]$mean=1.84
df1[df1$Transfer=="T3",]$st.error=0.22
# Mean and standard error of all transfer T4 for three replicates
mean(df1[df1$Transfer=="T4",]$ratio) #2.47
## [1] 2.477411
std.error(df1[df1$Transfer=="T4",]$ratio) #0.88
## [1] 0.8779285
## add values to table
df1[df1$Transfer=="T4",]$mean=2.47
df1[df1$Transfer=="T4",]$st.error=0.88
# Add transfer as individual number
df1$no_transfer=NA
df1[df1$Transfer=='T1',]$no_transfer=1
df1[df1$Transfer=='T2',]$no_transfer=2
df1[df1$Transfer=='T3',]$no_transfer=3
df1[df1$Transfer=='T4',]$no_transfer=4
# Visualize phage ratios compared to wild type in a bar plot (Fig. 4D)
a=ggplot(df1,aes(no_transfer, mean,fill=Line))+
geom_bar(stat="identity", position=position_dodge())+ # bar chart vertical
geom_errorbar(aes(ymin=mean-st.error, ymax=mean+st.error), width=.2,
position=position_dodge(.9))+ # add error bars
scale_fill_manual(values=c("#ae017e"))+ # set colors of bars
geom_hline(yintercept=1,linetype="dashed", color="black")+ # horizontal line at 1 (indicating ratio frequency of wild type )
xlab("Transfer")+ # label x-axis
ylab("Relative free phage particles after 30 mins")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "none")+
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # remove legend and set font size
a
# Save plot at high resolution (Fig 4 D)
#ggsave("free_phages.jpeg", plot = a, dpi = 600, width = 8, height = 6)
Code used to generate Fig. 5B and Fig. S6 in the manuscript
READ ME Specification of columns in data set
Sampling_time_min: Sampling time in minutes, phages were extracted from samples using chloroform
Genotype: Genotypes of phage isolates (Mutations all in protein F) - WT: wild type large plaque former - T101A: large plaque mutant evolved from WT during 40x 30-min transfers - G322D: small plaque mutant evolved from WT using 3 hour transfers - S427*: small plaque mutant evolved from WT using 3 hour transfers
CFU: colony-forming units of E. coli C mid-log phase culture used for phage infection
Dilution_CFU: 10 fold dilution step used for CFU plating
CFU_ml: Colony-forming units per ml calculated by CFUCFU_dilution10; factor *10 because 100 µl of CFU_dilution was plated
PFU: Plaque-forming units
Dilution_PFU: 10 fold dilution step used for PFU plating
PFU_ml: Plaque-forming units per ml calculated by PFUPFU_dilution10; factor *10 because 100 µl of PFU_dilution was plated
df=read.csv("experiment_6_decay_analysis.csv")
# df contains 40 observations of 8 variables
str(df) # structure of columns
## 'data.frame': 40 obs. of 8 variables:
## $ Sampling_time_min: int 30 30 30 30 60 60 60 60 90 90 ...
## $ Genotype : chr "WT" "G322D" "S427*" "T101A" ...
## $ CFU : int 108 108 108 108 108 108 108 108 108 108 ...
## $ Dilution_CFU : num 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 1e+05 ...
## $ CFU_ml : num 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 1.08e+08 ...
## $ PFU : int 62 72 245 101 81 128 138 48 40 147 ...
## $ Dilution_PFU : num 1e+07 1e+07 1e+07 1e+06 1e+08 1e+08 1e+08 1e+08 1e+08 1e+08 ...
## $ PFU_ml : num 6.20e+09 7.20e+09 2.45e+10 1.01e+09 8.10e+10 ...
summary(df) #summary of columns
## Sampling_time_min Genotype CFU Dilution_CFU
## Min. : 30 Length:40 Min. :108 Min. :1e+05
## 1st Qu.: 90 Class :character 1st Qu.:108 1st Qu.:1e+05
## Median :165 Mode :character Median :108 Median :1e+05
## Mean :165 Mean :108 Mean :1e+05
## 3rd Qu.:240 3rd Qu.:108 3rd Qu.:1e+05
## Max. :300 Max. :108 Max. :1e+05
## CFU_ml PFU Dilution_PFU PFU_ml
## Min. :1.08e+08 Min. : 15.00 Min. : 1000000 Min. :1.010e+09
## 1st Qu.:1.08e+08 1st Qu.: 48.00 1st Qu.: 10000000 1st Qu.:5.975e+09
## Median :1.08e+08 Median : 83.50 Median :100000000 Median :4.400e+10
## Mean :1.08e+08 Mean : 97.25 Mean : 65350000 Mean :6.710e+10
## 3rd Qu.:1.08e+08 3rd Qu.:139.25 3rd Qu.:100000000 3rd Qu.:1.190e+11
## Max. :1.08e+08 Max. :245.00 Max. :100000000 Max. :2.230e+11
# Convert genotype into factor
df$Genotype=as.factor(df$Genotype)
# Add plaque phenotype column to the df frame
df$Phenotype=NA
df[df$Genotype=="WT",]$Phenotype="LP"
df[df$Genotype=="T101A",]$Phenotype="LP"
df[df$Genotype=="G322D",]$Phenotype="SP"
df[df$Genotype=="S427*",]$Phenotype="SP"
# Convert phenotype into a factor
df$Phenotype=as.factor(df$Phenotype)
# Add Genotypes as Labels including the protein in which mutation occurs
df$Label=NA
df[df$Genotype=="G322D",]$Label="F(G322D)"
df[df$Genotype=="S427*",]$Label="F(S427*)"
df[df$Genotype=="T101A",]$Label="F(T101A)"
df[df$Genotype=="WT",]$Label="Ancestor"
# Convert Label into factor
df$Label=as.factor(df$Label)
(Supplementary Figure S6)
# Rearrange Label to the corresponding color code
df$Label <- factor(df$Label , levels=c("F(G322D)", "F(S427*)","F(T101A)", "Ancestor"))
# Plotting population dynamics throughout infection period
x=ggplot(df,aes(Sampling_time_min,PFU_ml,group=Label, color=Label))+
geom_point(size=3)+ # dotplot
geom_smooth(method="loess", se=F, linetype="dashed")+ #model fit loess
scale_y_continuous(trans="log10",
labels=trans_format("log10", math_format(10^.x)),
breaks=c(1e+09,1e+10,1e+11) )+ #y-axis log-transformation
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # setting colors
xlab("Infection period (min)")+ # label x-axis
ylab("Infectious phage particles per ml")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom")+ # legend position
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
## `geom_smooth()` using formula = 'y ~ x'
# Save plot as high resolution jpeg file
# ggsave("supplementary_pop_dyn.jpeg", plot = x, dpi = 600, width = 8, height = 6)
(Figure Fig 5B)
# Determine peak titer for each genotype and the time point of the peak
df$Peak=NA
df[df$Genotype=="WT",]$Peak=8.10e+10 # peak at 60 min p.i.
df[df$Genotype=="T101A",]$Peak=4.80e+10 #peak at 60 min p.i
df[df$Genotype=="G322D",]$Peak=1.83e+11 # peak at 120 min p.i.
df[df$Genotype=="S427*",]$Peak=2.23e+11 # peak at 120 min p.i.
# Calculate ratio between each time step and peak
df$Ratio=df$PFU_ml/df$Peak
# Create unique identifier combining sampling time and genotype
df$ID=paste(df$Sampling_time_min,df$Genotype,sep="_")
# Plot growth and decay ratios using a dot plot
ggplot(df,aes(Sampling_time_min,Ratio,group=Genotype,color=Genotype))+
geom_point(size=3)+ # dot plot
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # set color palette
xlab("Infection period (min)")+ # label x-axis
ylab("Ratio compared to titer peak")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom") # legend position
# Remove time points between start and peak phage titers to only capture decay period
data1=df[df$Sampling_time_min!=30,]
data1=data1[data1$ID!="60_G322D",]
data1=data1[data1$ID!="60_S427*",]
data1=data1[data1$ID!="90_G322D",]
data1=data1[data1$ID!="90_S427*",]
# Plot ratios during the decay period
ggplot(data1[data1$Sampling_time_min<310,],aes(Sampling_time_min,Ratio,group=Genotype,color=Genotype))+
geom_point(size=2.5)+ # dot plot
scale_color_manual(values=c("#c6d325","#ef7c00","#ae017e","#252525"))+ # set color palette
xlab("Infection period (min)")+ # label x-axis
ylab("Remaining fraction of infectious phage particles")+ # label y-axis
theme_bw()+ # plot layout
theme(legend.position = "bottom") # legend position
# ==> Phage decay seems to follow an exponential decay function
# Plot WT decay using a dotplot
ggplot(data1[data1$Genotype=="WT",], aes(Sampling_time_min,Ratio))+
geom_point()
# x and y vector extracted from the WT dataset for model fitting
x1=data1[data1$Genotype=="WT",]$Sampling_time_min
y1=data1[data1$Genotype=="WT",]$Ratio
# Compare Linear model with exponential decay model
## Linear model
a1=lm(y1~x1)
## Exponential model
### Select appropriate starting values: Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y1) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y1 - theta.0) ~ x1)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start<-list(alpha = alpha.0, beta = beta.0)
a2<- nls(y1 ~ alpha * exp(beta * x1) , start = start)
## Summary list AIC and BIC for model comparison
AIC(a1,a2)
## df AIC
## a1 3 0.3392432
## a2 3 -18.7277255
# df AIC
#a1 3 0.3392432
#a2 3 -18.7277255
BIC(a1,a2)
## df BIC
## a1 3 0.9309169
## a2 3 -18.1360518
# df BIC
#a1 3 0.9309169
#a2 3 -18.1360518
# ==> model a2 is best fit
## Plot predicted and measured values
plot(x1,y1)
lines(x1, predict(a2, list(x = x1)), col = 'skyblue', lwd = 3)
## Get parameter estimates
summary(a2)
##
## Formula: y1 ~ alpha * exp(beta * x1)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 3.483344 0.708369 4.917 0.001719 **
## beta -0.021112 0.002708 -7.797 0.000107 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06946 on 7 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 2.277e-06
# alpha; 3.5, beta -0.021
# Model checking
## creat an environment in which the model checking plots are saved at high resolution
#output_file <- "decay_WT_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
# setting plotting environment
par(mfrow=c(1,2))
# qqplot
qqPlot(residuals(a2))
## [1] 4 3
### Normality of residuals
plot(residuals(a2)~fitted(a2))
abline(a=0, b=0, col="red")
#dev.off()
# Visualize decay period using a dotplot
ggplot(data1[data1$Genotype=="T101A",], aes(Sampling_time_min,Ratio))+
geom_point()
# x and y vector extracted from the WT dataset for model fitting
x2=data1[data1$Genotype=="T101A",]$Sampling_time_min
y2=data1[data1$Genotype=="T101A",]$Ratio
# Model selection
## Linear model
b1=lm(y2~x2)
## Exponential decay model
###Select appropriate starting values; Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y2) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y2 - theta.0) ~ x2)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start=list(alpha = alpha.0, beta = beta.0)
b2 <- nls(y2 ~ alpha * exp(beta * x2), start = start)
### Select model based on lower AIC and BIC value
## Combine all AICs and BICs
AIC(b1,b2)
## df AIC
## b1 3 0.08641326
## b2 3 -12.04236649
# df AIC
# b1 3 0.08641326
# b2 3 -12.04236649
BIC(b1,b2)
## df BIC
## b1 3 0.678087
## b2 3 -11.450693
# df BIC
# b1 3 0.678087
# b2 3 -11.450693
# ==> Select model b2 due to lower AIC and BIC
# Plot fitted curve and data
plot(x2,y2)
lines(x2, predict(b2, list(x = x2)), col = 'skyblue', lwd = 3)
summary(b2)
##
## Formula: y2 ~ alpha * exp(beta * x2)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 3.078747 0.904802 3.403 0.01140 *
## beta -0.019946 0.003831 -5.206 0.00124 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1007 on 7 degrees of freedom
##
## Number of iterations to convergence: 9
## Achieved convergence tolerance: 7.025e-06
# alpha: 3.08, beta= -0.0199
# Model checking for b3
## Create an environment in which the model validation plots are safed at high resolution
#output_file <- "decay_T101A_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
## Set up plot window
par(mfrow=c(1,2))
## QQPlot
qqPlot(residuals(b2))
## [1] 2 6
## Residuals vs fitted values
plot(residuals(b2)~fitted(b2))
abline(a=0, b=0, col="red")
#dev.off()
# Visualize decay using a dotplot
ggplot(data1[data1$Genotype=="G322D",], aes(Sampling_time_min,Ratio))+
geom_point()
# Remove outlier t210
x3=data1[data1$Genotype=="G322D" & data1$Sampling_time_min!=210,]$Sampling_time_min
y3=data1[data1$Genotype=="G322D" & data1$Sampling_time_min!=210,]$Ratio
# Model comparison between linear and exponential decay model
## Linear model
c1=lm(y3~x3)
## exponential model
### Select appropriate starting values
### Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y3) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y3 - theta.0) ~ x3)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start <- list(alpha = alpha.0, beta = beta.0)
c2 <- nls(y3 ~ alpha * exp(beta * x3) , start = start)
### Summary all AICs and BICs
AIC(c1,c2)
## df AIC
## c1 3 -7.792185
## c2 3 -11.601528
# df AIC
# c1 3 -7.792185
# c2 4 -11.675159
BIC(c1,c2)
## df BIC
## c1 3 -8.416907
## c2 3 -12.226250
# df BIC
#c1 3 -8.416907
#c2 4 -12.2262
# ==> Model c2 selected based on lower AIC and BIC
## Plot predicted vs measured values
plot(x3,y3)
lines(x3, predict(c2, list(x = x3)), col = 'skyblue', lwd = 3)
# Extract parameter estimates
summary(c2)
##
## Formula: y3 ~ alpha * exp(beta * x3)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 2.091772 0.308667 6.777 0.00247 **
## beta -0.006480 0.000862 -7.518 0.00168 **
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.06836 on 4 degrees of freedom
##
## Number of iterations to convergence: 5
## Achieved convergence tolerance: 3.111e-06
## alpha: 2.09, beta: -0.0065
# Model validation for c2
## Creat an evironment in which the validation plot can be saved at high resolution
#output_file <- "decay_G322D_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
# Plot setup
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(c2))
## [1] 5 3
## residuals vs fitted values plot
plot(residuals(c2)~fitted(c2))
abline(a=0, b=0, col="red")
#dev.off()
# Visualize decay using a dotplot
ggplot(data1[data1$Genotype=="S427*",], aes(Sampling_time_min,Ratio))+
geom_point()
# remove outlier t210
x4=data1[data1$Genotype=="S427*"&data1$Sampling_time_min!="210",]$Sampling_time_min
y4=data1[data1$Genotype=="S427*"& data1$Sampling_time_min!="210",]$Ratio
plot(x4,y4)
# Model selection between linear and exponential decay model
## Linear model
d1=lm(y4~x4)
## Exponential decay model: Select appropriate starting values; Select an approximate $\theta$, since theta must be lower than min(y), and greater than zero
theta.0 <- min(y4) * 0.5
### Estimate the rest parameters using a linear model
model.0 <- lm(log(y4 - theta.0) ~ x4)
alpha.0 <- exp(coef(model.0)[1])
beta.0 <- coef(model.0)[2]
### Starting parameters
start <- list(alpha = alpha.0, beta = beta.0)
d2 <- nls(y4 ~ alpha * exp(beta * x4) , start = start)
### Model comparison to chose the best fit model using AIC and BIC
AIC(d1,d2)
## df AIC
## d1 3 -3.548757
## d2 3 -5.901902
# df AIC
#d1 3 -3.548757
#d2 3 -5.901902
BIC(d1,d2)
## df BIC
## d1 3 -4.173478
## d2 3 -6.526623
# df BIC
# d1 3 -4.173478
# d2 3 -6.526623
# ==> Model d2 selected based on the lower AIC and BIC
## Extract parameter estimates
summary(d2)
##
## Formula: y4 ~ alpha * exp(beta * x4)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## alpha 1.745038 0.407150 4.286 0.0128 *
## beta -0.005442 0.001313 -4.146 0.0143 *
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 0.1099 on 4 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 2.651e-06
## alpha: 1.75, beta: -0.0054
# Model validation for d2
## Create an evironment to save validation plots at high resolution
#output_file <- "decay_S427_fitting.jpeg"
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
## Plot layout
par(mfrow=c(1,2))
## qqplot
qqPlot(residuals(d2))
## [1] 3 6
## residuals vs fitted values plot
plot(residuals(d2)~fitted(d2))
abline(a=0, b=0, col="red")
#dev.off()
(Figure Fig. 5B)
# Visualize decay data using dot and line plots
a=ggplot()+
geom_point(aes(x=x1,y=y1),color="#252525")+ # measured decay ancestor
geom_point(aes(x=x2,y=y2),color="#ae017e")+ # measured decay F(T101A)
geom_point(aes(x=x3,y=y3),color="#c6d325")+ # measured decay F(G322D)
geom_point(aes(x=x4,y=y4),color="#ef7c00")+ # measured decay F(S427*)
annotate(geom="text", x=250, y=1.00, hjust=0, label=expression("Decay rate (min"^{-1}*"):"),
color="black", fontface="bold", size=5)+ # Decay rate text
annotate(geom="text", x=250, y=0.92, hjust=0,label=expression("F(G322D): 6.5*10"^{-3}*""),
color="#c6d325", fontface="bold", size=5)+ # Decay rate text F(G322D)
annotate(geom="text", x=250, y=0.85, hjust=0,label=expression("F(S427*): 5.4*10"^{-3}*""),
color="#ef7c00", fontface="bold", size=5)+# Decay rate text F(S427*)
annotate(geom="text", x=250, y=0.78, hjust=0,label=expression("F(T101A): 2.0*10"^{-2}*""),
color="#ae017e", fontface="bold", size=5)+# Decay rate text F(T101A)
annotate(geom="text", x=250, y=0.71, hjust=0,label=expression("Ancestor: 2.1*10"^{-2}*""),
color="#252525", fontface="bold", size=5)+ # Decay rate text ancestor
geom_line(aes(x=x1, y=predict(a2)),color="#252525", size=0.75)+ # predicted decay ancestor
geom_line(aes(x=x2, y=predict(b2)),color="#ae017e", size=0.75)+ # predicted decay F(T101A)
geom_line(aes(x=x3, y=predict(c2)),color="#c6d325", linetype="dashed",size=0.75)+ # predicted decay F(G322D)
geom_line(aes(x=x4, y=predict(d2)),color="#ef7c00", linetype="dashed",size=0.75)+ # predicted decay F(S427*)
xlab("Elapsed time since infection (min)")+ # label x-axis
ylab("Remaining fraction of infectious phage particles")+ # label y-axis
theme_bw()+ # plot layout
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
## Warning: Using `size` aesthetic for lines was deprecated in ggplot2 3.4.0.
## ℹ Please use `linewidth` instead.
## This warning is displayed once every 8 hours.
## Call `lifecycle::last_lifecycle_warnings()` to see where this warning was
## generated.
a
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
## Warning in is.na(x): is.na() applied to non-(list or vector) of type
## 'expression'
# Save plot at high resolution
# ggsave("decay_plot.jpeg", plot = a, dpi = 600, width = 9, height = 7)
Code used to generate Fig. S3 in the manuscript
READ ME Specification of columns in data set
Genotype: Phage genotype for which OSGC was done: wild type (WT)
MOI: multiplicity of infectiousness input when adsorption is initiated, for all runs 0.1
Sampling_time_min: Sampling time point in minutes after the 6 minute synchronized adsorption period followed by a 2000x dilution step
PFU_1: Plaque-forming unit counts (PFU) for first plating of triplicate plating for lysate
PFU_2: Plaque-forming unit counts (PFU) for second plating of triplicate plating for lysate
PFU_3: Plaque-forming unit counts (PFU) for third plating of triplicate plating for lysate
Median_PFU: Median PFU counts from triplicate plating PFU_1, PFU_2, PFU_3. If triplicate plating was not possible, mean of two platings or just one plating value was taken for phage titer calculation. Reason for this was not enough sampling volume to do triplicate plating (i.e. at early time points).
Dilution_PFU: Plating dilution used to plate phage lysate isolated after the one-step growth curve using chloroform in a plaque assay
Dilution_OSGC: Dilution step between adsorption and OSGC sampling to prevent secondary infection.The dilution step for all three one-step runs was a 2000-fold dilution using 10 µl of bacteria-phage mix after adsorption and dilute it into 20 ml supplemented, prewarmed (37C) LB-medium.
PFU_ml: Phage concentration in Plaque-forming untis per ml (PFU/ml), calculated from Median_PFU *Dilution_PFU
t0_PFU_ml: Phage concentration in PFU/ml at time point t0 after adsorption. These are free phages which need to be subtracted from each following time point phage titer to normalize the data for free/unadsorbed phages.
PFU_t0_norm: Normalized PFU concentration for each time point calculated by PFU_ml - t0_PFU_ml
Bacteria_CFU_ml: Bacteria concentration in colony-forming units per ml CFU/ml) at the start of the adsorption period. Calculated by counting colony-forming units (CFU)* Plating dilution (10^6)
Infected_CFU_ml: Number of Bacteria which could be in theory infected by phages calculated by Bacteria_CFU_ml*MOI/Dilution_OSGC
PFU_ml_produced: Mean phage burst size at each sampling time point
Elapsed_time: elased time since adsorption. Calculated by adding 6 min adsorption to each “Sampling_time_ min” OSGC sampling time point
df=read.csv("experiment_7_OSGC_WT.csv")
# df has 11 observations and 16 variables
str(df) # structure of columns
## 'data.frame': 11 obs. of 16 variables:
## $ Genotype : chr "WT" "WT" "WT" "WT" ...
## $ MOI : num 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 0.1 ...
## $ Sampling_time_min: int 1 2 3 4 5 6 8 10 15 20 ...
## $ PFU_1 : num 17 26 6 12 134 ...
## $ PFU_2 : int 11 18 NA 11 148 119 23 33 15 16 ...
## $ PFU_3 : int NA NA NA NA 136 129 14 65 18 21 ...
## $ Median_PFU : num 14 22 6 11.5 136 129 23 65 18 21 ...
## $ Dilution_PFU : num 2.5 2.5 3.3 5.0 1.0e+01 1.0e+02 1.0e+04 1.0e+04 1.0e+05 1.0e+05 ...
## $ Dilution_OSGC : int 2000 2000 2000 2000 2000 2000 2000 2000 2000 2000 ...
## $ PFU_ml : num 3.50e+01 5.50e+01 1.98e+01 5.75e+01 1.36e+03 1.29e+04 2.30e+05 6.50e+05 1.80e+06 2.10e+06 ...
## $ t0_PFU_ml : num 35 35 35 35 35 35 35 35 35 35 ...
## $ PFU_t0_norm : num 0 20 -15.2 22.5 1330 12900 230000 650000 1800000 2100000 ...
## $ Bacteria_CFU_ml : num 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 2.81e+08 ...
## $ Infected_CFU_ml : num 14100 14100 14100 14100 14100 14100 14100 14100 14100 14100 ...
## $ PFU_ml_produced : num 0 0 0 0 0.09 ...
## $ Elapsed_time : int 7 8 9 10 11 12 14 16 21 26 ...
ggplot(df, aes(Elapsed_time,PFU_ml_produced))+
geom_point(size=2)+ # dot plot
scale_y_continuous(trans = scales::pseudo_log_trans(),
breaks = c(0, 10^(0:6)))+ # log transformed y axis
xlim(0,35)+ # set x limits so time point 0-6 min (adsorption time) are also visualized
theme_bw() # plot layout
# Subset data frame to columns "Elapsed time" and "PFU_ml_produced"
WT=df[,c(16,15)]
## For model fitting, first compare exponential growth with logistic growth model to determine "best-fit" model
### Exponential increase model (beta= constant, alpha=growth rate)
model1<-nls(PFU_ml_produced~(beta+exp(alpha*Elapsed_time)),
data=WT, start=list(beta=max(WT$PFU_ml_produced),alpha=0.5))
### Logistic growth model
model2 <- nls(PFU_ml_produced ~ SSlogis(Elapsed_time, b, l, scal), data=WT,
start = list(b=max(WT$PFU_ml_produced),l=mean(WT$Elapsed_time),scal=1))
## Model comparison based on AIC and BIC
AIC(model1,model2)
## df AIC
## model1 3 117.03264
## model2 4 73.82167
# df AIC
#model1 3 117.03
#model2 4 73.82
BIC(model1,model2)
## df BIC
## model1 3 118.22633
## model2 4 75.41325
#df BIC
#model1 3 118.23
#model2 4 75.41
### --> Select model 2 [logistic growth] due to the lower AIC and BIC value ###
## Extract parameters
summary(model2)
##
## Formula: PFU_ml_produced ~ SSlogis(Elapsed_time, b, l, scal)
##
## Parameters:
## Estimate Std. Error t value Pr(>|t|)
## b 138.0512 3.8516 35.842 4.02e-10 ***
## l 17.0011 0.3487 48.757 3.46e-11 ***
## scal 1.4321 0.2583 5.545 0.000544 ***
## ---
## Signif. codes: 0 '***' 0.001 '**' 0.01 '*' 0.05 '.' 0.1 ' ' 1
##
## Residual standard error: 5.653 on 8 degrees of freedom
##
## Number of iterations to convergence: 6
## Achieved convergence tolerance: 8.576e-06
#Parameters:
#b 138.0512 3.8516 35.842 4.02e-10 ***
#l 17.0011 0.3487 48.757 3.46e-11 ***
## Results##
# burst size b = 138, lysis time l= 17 minutes
## Model validation using a qqplot and residuals vs fitted plot (Supplementary S3 B and C)
# Set the output file name to export plots at high resolution
output_file <- "model_check_OSGC.jpeg"
# Open a PNG device
#jpeg(filename = output_file, width = 8, height = 4, units = "in", res = 600)
#par(mfrow=c(1,2))
qqPlot(residuals(model2)) # looks good
## [1] 10 11
plot(residuals(model2)~fitted(model2))
abline(a=0, b=0, col="red") # ok
#dev.off()
## Visualize model fit for one-step growth curve (Supplementary S3 A)
x=ggplot()+
geom_point(data=WT,aes(x=Elapsed_time,y=PFU_ml_produced), color="#252525")+ #raw data of produced phages per time step
geom_line(data=WT,aes(x=Elapsed_time,y=predict(model2)),color="#252525", linetype="dashed",size=0.8)+ # model fit of model 2
annotate(geom="text", x=6, y=140, hjust=0,label="lysis time: 17 min",
color="black")+ # add lysis time value as text
annotate(geom="text", x=6, y=130, hjust=0,label="burst size: 138 per host cell",
color="black")+ # add burst size value as text
xlab("Elased time since adsorption initiation (min)")+ # label x-axis
ylab("Produced infectious phage particles per ml")+ # label y-axis
theme_bw()+ # plot layout
theme(axis.text=element_text(size=12),
axis.title=element_text(size=14)) # font size
x
# Export one step plot at high resolution
# ggsave("OSGC_WT.jpeg", plot = x, device = "jpeg", dpi = 600, width = 8, height = 6)